﻿using HtmlAgilityPack;
using Sgml;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using System.Web.Mvc;
using System.Xml;
using System.Xml.XPath;
using Wei.Attributes;

namespace Wei.Controllers
{

    public class Info
    {
        public int Id { get; set; }
        public string Name { get; set; }
        public string Count { get; set; }
        public string UpdateNew { get; set; }
        public string Status { get; set; }
        public string ZuoZhe { get; set; }
        public string Content_Url { get; set; }
        public List<Novel_Type> novel_List { get; set; }
    }
    public class Novel_Type
    {
        public string Name { get; set; }
        public string Url { get; set; }
    }

    

    //[LoginVerifyFilter]
    public class CrawlController : BaseController
    {
        private string novel_url = "https://www.23us.so/";
        // GET: Crawl
        public ActionResult Index()
        {
            return View();
        }

        public JsonResult GetNovelType()
        {
            string html = GetHtml("https://www.23us.so/top/lastupdate_1.html");


            Regex novel_type_reg = new Regex(@"<div class=""main m_menu"">(.|\n)*?</div>");
            var novel_type_div = novel_type_reg.Match(html);
            var novel_type_li = novel_type_div.Groups[0].ToString();
            Regex novel_type_li_reg = new Regex("<a[^>]+?href=\"([^\"]+)\"[^>]*>([^<]+)</a>", RegexOptions.Compiled);
            MatchCollection novel_tyle_list = novel_type_li_reg.Matches(novel_type_li);
            List<Novel_Type> novel_types = new List<Novel_Type>();
            if (novel_tyle_list.Count != 0)
            {
                for (int i = 0; i < novel_tyle_list.Count; i++)
                {
                    if (i == 0) continue;
                    Novel_Type type = new Novel_Type();
                    type.Name = novel_tyle_list[i].Groups[2].Value;
                    type.Url = novel_url + novel_tyle_list[i].Groups[1].Value;
                    novel_types.Add(type);
                }
            }
            var res = new { data = novel_types };
            return Json(res, JsonRequestBehavior.AllowGet);
        }

        public JsonResult GetXiaoShuo()
        {
            string url = "https://www.23us.so/top/lastupdate_{0}.html";
            List<Info> list = new List<Info>();
            for (int i = 1; i < 2; i++)
            {
                try
                {

              
                var html = GetHtml(string.Format(url,i.ToString()));
                HtmlDocument html_Doc = new HtmlDocument();
                html_Doc.LoadHtml(html);

                foreach (var script in html_Doc.DocumentNode.Descendants("script").ToArray())
                    script.Remove();
                foreach (var style in html_Doc.DocumentNode.Descendants("style").ToArray())
                    style.Remove();

                //foreach (var comment in html_Doc.DocumentNode.SelectNodes("//comment()").ToArray())
                //    comment.Remove();


                HtmlNode node = html_Doc.GetElementbyId("content");

                HtmlNodeCollection div = node.SelectNodes("//tr[@bgcolor=\"#FFFFFF\"]");
               
                foreach (var item in div)
                {
                    HtmlNodeCollection tr = item.SelectNodes("./td");
                    Info info = new Info();
                    info.Name = tr[0].InnerText;
                        info.Content_Url = tr[1].FirstChild.Attributes[0].Value;
                    info.ZuoZhe = tr[2].InnerText;
                    info.Count = tr[3].InnerText;
                    info.UpdateNew = tr[4].InnerText;
                    info.Status = tr[5].InnerText;

                    list.Add(info);
                }
                }
                catch (Exception)
                {
                    continue;
                }
            }
           
            
            var res = new { code = "0", msg = "", count = "10000", data = list };
            return Json(res, JsonRequestBehavior.AllowGet);
        }
        

        //public JsonResult GetXiaoShuo()
        //{
        //    string html = HttpGet("https://www.23us.so/top/lastupdate_1.html", "");
            
        //    //获取小说信息
        //    Regex reg_table = new Regex(@"<table style=""width:736px;"" cellpadding=""0"" cellspacing=""1"" bgcolor=""#E4E4E4"">(.|\n)*?</table>");
        //    var mat_info = reg_table.Match(html);
        //    var info = mat_info.Groups[0].ToString();
        //    Regex tmpreg = new Regex("<a[^>]+?href=\"([^\"]+)\"[^>]*>([^<]+)</a>", RegexOptions.Compiled);
        //    MatchCollection sMC = tmpreg.Matches(info);

        //    Random rand = new Random();
        //    List<Info> list = new List<Info>();
        //    if (sMC.Count != 0)
        //    {
        //        for (int i = 0; i < sMC.Count; i++)
        //        {
        //            Info inf = new Info();
        //            inf.Id = i;
        //            inf.Name = sMC[i].Groups[2].Value;
        //            inf.ZuoZhe = sMC[i].Groups[3].Value;
        //            inf.Count = rand.Next(9999,99999).ToString();
        //            inf.UpdateNew = DateTime.Now.ToString("yyyy-MM-dd");
        //            inf.Status = "正在连载";
        //            list.Add(inf);
        //        }
        //    }
           
        //    var res = new { code = "0", msg = "", count = "10000", data= list };
        //    return Json(res, JsonRequestBehavior.AllowGet);
        //}

        public string Get_MP3()
        {

            string str = @"
/** 
* lowerValue 最小值 
* upperValue 最大值 
*/ 
function selectFrom(lowerValue, upperValue){ 

//取值范围总数 
var choices = upperValue - lowerValue + 1; 
return Math.floor(Math.random() * choices + lowerValue); 
} 
var num = selectFrom(2, 10); 
alert(num);//介于2和10之间（包括2和10）的一个数值 


var color = colors[selectFrom(0, colors.length - 1)];
            alert(color);//可能是数组中包含的任何一个值 
/** 
* lowerValue 最小值 
* upperValue 最大值 
*/ 
function selectFrom(lowerValue, upperValue){ 

//取值范围总数 
var choices = upperValue - lowerValue + 1; 
return Math.floor(Math.random() * choices + lowerValue); 
} 
var num = selectFrom(2, 10); 
alert(num);//介于2和10之间（包括2和10）的一个数值 


var color = colors[selectFrom(0, colors.length - 1)];
            alert(color);//可能是数组中包含的任何一个值 
/** 
* lowerValue 最小值 
* upperValue 最大值 
*/ 
function selectFrom(lowerValue, upperValue){ 

//取值范围总数 
var choices = upperValue - lowerValue + 1; 
return Math.floor(Math.random() * choices + lowerValue); 
} 
var num = selectFrom(2, 10); 
alert(num);//介于2和10之间（包括2和10）的一个数值 


var color = colors[selectFrom(0, colors.length - 1)];
            alert(color);//可能是数组中包含的任何一个值 
            ";

            string file_name = Server.MapPath("~/log/log1.txt");
            long file_size = 0;
            if (System.IO.File.Exists(file_name))
            {
                file_size = new FileInfo(file_name).Length;
                if (file_size > 100000)
                {
                    return "够大了";
                }
                else
                {
                    System.IO.File.AppendAllText(file_name, str);
                }
            }
            else
            {
                System.IO.File.AppendAllText(file_name, str);
                file_size = new FileInfo(file_name).Length;
            }
           
           
            return file_size.ToString();
        }

        public string get_novel_size()
        {
            long file_size = 0;
            int len = 0;
            if (Session["Novel_path"] != null && Session["Novel_size"] != null)
            {
                 len = Convert.ToInt32(Session["Novel_size"]);
                if (len > 0)
                {
                    string file_name = Session["Novel_path"].ToString();
                    if (System.IO.File.Exists(file_name))
                    {
                        file_size = new FileInfo(file_name).Length;

                        if (file_size > (len * 8000))
                        {
                            Session["Novel_path"] = "";
                            Session["Novel_size"] = 0;
                            return "获取完成";
                        }
                    }
                }
            }
            return file_size.ToString() + ";" + len.ToString();
        }


        /// <summary>
        /// 下载远程文件
        /// </summary>
        /// <param name="filePath">文件地址</param>
        /// <returns></returns>
        public ActionResult DownLoadFile(string filePath)
        {
            if (filePath == null)
            {
                return null;
            }
             filePath = "http://www.chenjinwei.com/321.mp3";
            int startIndex = filePath.LastIndexOf("/");
            string fileName = filePath.Substring(startIndex + 1);
            byte[] fileData;
            try
            {
                WebRequest.Create(filePath);
            }
            catch (Exception ex)
            {
                //To do something
                return null;
            }
            try
            {
                using (WebClient client = new WebClient())
                {
                    fileData = client.DownloadData(filePath);
                    return File(fileData, "audio/mpeg", fileName);
                }
            }
            catch (Exception ex)
            {
                //To do something
                return null;
            }
        }

        public string Get_Data(string crawl_key)
        {
            string file_path = "";
            //                     https://www.23us.so/files/article/html/0/326/index.html
            string html = GetHtml(crawl_key);
            //获取小说名字
            Match ma_name = Regex.Match(html, @"<meta name=""keywords"".+content=""(.+)""/>");
            
            string name = ma_name.Groups[1].Value.ToString().Split(',')[0] + DateTime.Now.ToString("yyyyMMdd");
            //获取目录
            Regex reg_mulu = new Regex(@"<table cellspacing=""1"" cellpadding=""0"" bgcolor=""#E4E4E4"" id=""at"">(.|\n)*?</table>");

            var mat_mulu = reg_mulu.Match(html);
            string mulu = mat_mulu.Groups[0].ToString();
            //匹配a标签里面的url
            Regex tmpreg = new Regex("<a[^>]+?href=\"([^\"]+)\"[^>]*>([^<]+)</a>", RegexOptions.Compiled);
            MatchCollection sMC = tmpreg.Matches(mulu);
            // txt文本输出
            string path = AppDomain.CurrentDomain.BaseDirectory.Replace("\\", "/") + "text/";
            file_path = path + name  + ".txt";
            Session["Novel_path"] = file_path;
            Session["Novel_size"] = sMC.Count.ToString();
            if (sMC.Count != 0)
            {
                //循环目录url，获取正文内容
                for (int i = 0; i < sMC.Count; i++)
                {
                    //sMC[i].Groups[1].Value
                    //0是<a href="http://www.23us.so/files/article/html/13/13655/5638725.html">第一章 泰山之巅</a> 
                    //1是http://www.23us.so/files/article/html/13/13655/5638725.html
                    //2是第一章 泰山之巅

                    // 获取章节标题
                    string title = sMC[i].Groups[2].Value;

                    // 获取文章内容
                    string html_z = GetHtml(sMC[i].Groups[1].Value);

                    // 获取小说名字,章节中也可以查找名字
                    //Match ma_name = Regex.Match(html, @"<meta name=""keywords"".+content=""(.+)"" />");
                    //string name = ma_name.Groups[1].Value.ToString().Split(',')[0];

                    // 获取标题,通过分析h1标签也可以得到章节标题
                    //string title = html_z.Replace("<h1>", "*").Replace("</h1>", "*").Split('*')[1];

                    // 获取正文
                    Regex reg = new Regex(@"<dd id=""contents"">(.|\n)*?</dd>");
                    MatchCollection mc = reg.Matches(html_z);
                    var mat = reg.Match(html_z);
                    string content = mat.Groups[0].ToString().Replace("<dd id=\"contents\">", "").Replace("</dd>", "").Replace("&nbsp;", "").Replace("<br />", "\r\n");
                   
                    Novel( "========================" + title + "=============================" + "\r\n" + content, name, path);
                }
            }

            if (!System.IO.File.Exists(file_path))
            {
                file_path = "fail";
            }
            else
            {
                file_path = "text/" + name + ".txt";
            }
          
            return file_path;
        }

        /// <summary>
        /// 创建文本
        /// </summary>
        /// <param name="content">内容</param>
        /// <param name="name">名字</param>
        /// <param name="path">路径</param>
        public void Novel(string content, string name, string path)
        {
            string Log = content + "\r\n";
            // 创建文件夹，如果不存在就创建file文件夹
            if (Directory.Exists(path) == false)
            {
                Directory.CreateDirectory(path);
            }

            // 判断文件是否存在，不存在则创建
            if (!System.IO.File.Exists(path + name + ".txt"))
            {
                FileStream fs1 = new FileStream(path + name + ".txt", FileMode.Create, FileAccess.Write);// 创建写入文件 
                StreamWriter sw = new StreamWriter(fs1);
                sw.WriteLine(Log);// 开始写入值
                sw.Close();
                fs1.Close();
            }
            else
            {
                FileStream fs = new FileStream(path + name + ".txt" + "", FileMode.Append, FileAccess.Write);
                StreamWriter sr = new StreamWriter(fs);
                sr.WriteLine(Log);// 开始写入值
                sr.Close();
                fs.Close();
            }
            long a = new FileInfo(path + name + ".txt").Length;
            a = 1;
        }

        public string HttpPost(string Url, string postDataStr)
        {
            CookieContainer cookie = new CookieContainer();
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
            request.Method = "POST";
            request.ContentType = "application/x-www-form-urlencoded";
            request.ContentLength = Encoding.UTF8.GetByteCount(postDataStr);
            request.CookieContainer = cookie;
            Stream myRequestStream = request.GetRequestStream();
            StreamWriter myStreamWriter = new StreamWriter(myRequestStream, Encoding.GetEncoding("gb2312"));
            myStreamWriter.Write(postDataStr);
            myStreamWriter.Close();

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            response.Cookies = cookie.GetCookies(response.ResponseUri);
            Stream myResponseStream = response.GetResponseStream();
            StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
            string retString = myStreamReader.ReadToEnd();
            myStreamReader.Close();
            myResponseStream.Close();

            return retString;
        }

        public string Get_Data_By_Url(string url)
        {
            try
            {
                //WebRequest request = WebRequest.Create(url);
                //WebResponse response = request.GetResponse();
                //string html = "";
                //using (Stream stream = response.GetResponseStream())
                //{
                //    using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
                //    {
                //        html = reader.ReadToEnd();
                //        return html;
                //    }
                //}
                WebRequest request = WebRequest.Create(url);
                WebResponse response = request.GetResponse();
                string html = "";
                //好像是因为返回的数据做了压缩
                using (Stream stream = new System.IO.Compression.GZipStream(response.GetResponseStream(), System.IO.Compression.CompressionMode.Decompress))
                //using (Stream stream = response.GetResponseStream())
                {
                    using (StreamReader reader = new StreamReader(stream,Encoding.UTF8))
                    {
                        html = reader.ReadToEnd();
                        return html;
                    }
                }
            }
            catch (Exception ex)
            {
                return ex.Message;
            }
            
           
        }

        public string GetHtml(string url)
        {
            string htmlCode;
            HttpWebRequest webRequest = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);
            webRequest.Timeout = 30000;
            webRequest.Method = "GET";
            webRequest.UserAgent = "Mozilla/4.0";
            webRequest.Headers.Add("Accept-Encoding", "gzip, deflate");
            HttpWebResponse webResponse = (System.Net.HttpWebResponse)webRequest.GetResponse();

            //获取目标网站的编码格式
            string contentype = webResponse.Headers["Content-Type"];
            Regex regex = new Regex("charset\\s*=\\s*[\\W]?\\s*([\\w-]+)", RegexOptions.IgnoreCase);
            if (webResponse.ContentEncoding.ToLower() == "gzip")//如果使用了GZip则先解压
            {
                using (System.IO.Stream streamReceive = webResponse.GetResponseStream())
                {
                    using (var zipStream = new System.IO.Compression.GZipStream(streamReceive, System.IO.Compression.CompressionMode.Decompress))
                    {

                        //匹配编码格式
                        if (regex.IsMatch(contentype))
                        {
                            Encoding ending = Encoding.GetEncoding(regex.Match(contentype).Groups[1].Value.Trim());
                            using (StreamReader sr = new System.IO.StreamReader(zipStream, ending))
                            {
                                htmlCode = sr.ReadToEnd();
                            }
                        }
                        else
                        {
                            using (StreamReader sr = new System.IO.StreamReader(zipStream, Encoding.UTF8))
                            {
                                htmlCode = sr.ReadToEnd();
                            }
                        }
                    }
                }
            }
            else
            {
                using (System.IO.Stream streamReceive = webResponse.GetResponseStream())
                {
                    using (System.IO.StreamReader sr = new System.IO.StreamReader(streamReceive, Encoding.Default))
                    {
                        htmlCode = sr.ReadToEnd();
                    }
                }
            }
            return htmlCode;
        }


        public string HttpGet(string url,string postDataStr)
        {
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url + (postDataStr == "" ? "" : "?") + postDataStr);
            request.Method = "GET";
            HttpWebResponse response;
            request.ContentType = "text/html;charset=UTF-8";
            try
            {
                response = (HttpWebResponse) request.GetResponse();
            }
            catch (Exception ex)
            {
                response = (HttpWebResponse)request.GetResponse();
            }
            Stream myResponseStream = response.GetResponseStream();
            StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
            string retString = myStreamReader.ReadToEnd();
            myStreamReader.Close();
            myResponseStream.Close();
            return retString;
        }


        public FileResult download(string path)
        {
            string file_path = Server.MapPath("~/"+path);
            string file_name = System.IO.Path.GetFileName(file_path);
            return File(file_path, "text/plain", file_name);
        }

    }
}